source code of /sitemap.py

Last modified
Lines 261

Parent directory Download CGIread sitemap Main page

Quick links: HTML XML contact content footer lastmod_changefreq main navigation pretty title write_sitemap

  1. #!/usr/bin/python2
  2. # -*- coding: UTF-8 -*-
  3. import compressout
  4. import os
  5. import time
  6. import cgitb
  7. cgitb.enable()
  8. import sitemapdata
  9. def lastmod_changefreq(filename, mod_compressout):
  10.     changefreq = [
  11.         ('hourly',  21600),     # 6 hours
  12.         ('daily',   259200),    # 3 days
  13.         ('weekly',  1209600),   # 2 weeks
  14.         ('monthly', 5184000),   # 60 days
  15.         ('yearly',  -1)             # Stop
  16.     ]
  17.     last_modifed = os.stat(filename).st_mtime
  18.     mod_compressout.write_b('    <lastmod>{}</lastmod>\n'.format(
  19.         time.strftime('%Y-%m-%d', time.gmtime(last_modifed))
  20.     ))
  21.     # Use highest frequency that is at most 2/T
  22.     # T is the time between the previous change and the next change.
  23.     # As the next change has not yet occurred, assume that
  24.     # now = previous + T/2, ie. right in the middle.
  25.     time_delta = time.time() - last_modifed
  26.     changefreq_kw = None
  27.     for keyword, limit in changefreq:
  28.         if limit/2.0 > time_delta:
  29.             break
  30.         changefreq_kw = keyword
  31.         if limit < 0:
  32.             break
  33.     mod_compressout.write_b(
  34.         '    <changefreq>{}</changefreq>\n'.format(keyword)
  35.     )
  36. def pretty(text):
  37.     text = text.replace('\n', ' ')
  38.     while 2*' ' in text:
  39.         text = text.replace(2*' ', 1*' ')
  40.     text = text.replace('&', '&amp;')
  41.     text = text.replace('<', '&lt;')
  42.     text = text.replace('>', '&gt;')
  43.     return text.strip()
  44. def XML():
  45.     compressout.write_h('Content-Type: application/xml; charset=UTF-8\n\n')
  46.     compressout.write_b('<?xml version="1.0" encoding="UTF-8"?>\n')
  47.     compressout.write_b(
  48.         '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"\n'
  49.     )
  50.     compressout.write_b(
  51.         'xmlns:i="http://www.google.com/schemas/sitemap-image/1.1"\n'
  52.     )
  53.     compressout.write_b(
  54.         'xmlns:v="http://www.google.com/schemas/sitemap-video/1.1"\n'
  55.     )
  56.     compressout.write_b('>\n\n')
  57.     # Flatten subsitemaps
  58.     for url in sitemapdata.sitemap:
  59.         if 'subsitemap' in url:
  60.             sitemapdata.sitemap.extend(url['subsitemap'])
  61.             del url['subsitemap']
  62.     for url in sitemapdata.sitemap:
  63.         compressout.write_b('<url>\n')
  64.         compressout.write_b('    <loc>{}</loc>\n'.format(
  65.             sitemapdata.site + url['URL']
  66.         ))
  67.         if 'priority' in url:
  68.             compressout.write_b('    <priority>{}</priority>\n'.format(
  69.                 url['priority']
  70.             ))
  71.         if 'file' in url:
  72.             lastmod_changefreq(url['file'], compressout)
  73.         if 'images' in url:
  74.             for image in url['images']:
  75.                 compressout.write_b('    <i:image>\n')
  76.                 compressout.write_b(
  77.                     '        <i:loc>{}</i:loc>\n'.format(
  78.                         sitemapdata.imgsite + image['URL']
  79.                     )
  80.                 )
  81.                 if 'description' in image:
  82.                     compressout.write_b(
  83.                         '        <i:caption>{}</i:caption>\n'.format(
  84.                             pretty(image['description'])
  85.                         )
  86.                     )
  87.                 # Image license.
  88.                 if 'license' in image:
  89.                     license = image['license']
  90.                 else:
  91.                     license = sitemapdata.imglicense
  92.                 if license is not None:
  93.                     compressout.write_b(
  94.                         '        <i:license>{}</i:license>\n'.format(
  95.                             license
  96.                         )
  97.                     )
  98.                 compressout.write_b('    </i:image>\n')
  99.         if 'videos' in url:
  100.             for video in url['videos']:
  101.                 compressout.write_b('    <v:video>\n')
  102.                 compressout.write_b(
  103.                     '        <v:title>{}</v:title>\n'.format(
  104.                         video['title']
  105.                     )
  106.                 )
  107.                 if 'content' in video:
  108.                     compressout.write_b(
  109.                         '        <v:content_loc>{}</v:content_loc>\n'.format(
  110.                             video['content']
  111.                         )
  112.                     )
  113.                 if 'player' in video:
  114.                     compressout.write_b(
  115.                         '        <v:player_loc>{}</v:player_loc>\n'.format(
  116.                             video['player']
  117.                         )
  118.                     )
  119.                 compressout.write_b(
  120.                     '        <v:description>{}</v:description>\n'.format(
  121.                         video['description']
  122.                     )
  123.                 )
  124.                 compressout.write_b(
  125.                     '        <v:thumbnail_loc>{}</v:thumbnail_loc>\n'.format(
  126.                         video['thumbnail']
  127.                     )
  128.                 )
  129.                 compressout.write_b('    </v:video>\n')
  130.         compressout.write_b('</url>\n')
  131.     compressout.write_b('\n</urlset>\n')
  132. def HTML():
  133.     if 'application/xhtml+xml' in os.getenv('HTTP_ACCEPT', ''):
  134.         content_type = 'application/xhtml+xml'
  135.     else:
  136.         content_type = 'text/html'
  137.     compressout.write_h('Content-Type: {}; charset=UTF-8\n\n'.format(
  138.         content_type
  139.     ))
  140.     compressout.write_b('''<!DOCTYPE html>
  141. <html lang="en" xmlns="http://www.w3.org/1999/xhtml">
  142.     <head>
  143.         <meta charset="utf-8"/>
  144.         <meta name="viewport" content="width=device-width, initial-scale=1"/>
  145.         <link rel="stylesheet" href="https://oskog97.com/style.css" type="text/css"/>
  146.         <link rel="icon" type="image/png" href="/favicon.png"/>
  147. <!-- End html5nc macro. -->
  148. ''')
  149.     compressout.write_b('''
  150.         <title>Sitemap</title>
  151.         <meta name="robots" content="noindex, follow"/>
  152.     </head>
  153.     <body>
  154.         
  155. <!-- BEGIN autogenerated navigation -->
  156. <nav><div id="navigation"><div id="nav_inner">
  157. <p><a href="#content" class="textonly">Skip navigation</a></p>
  158. <p class="row">
  159. <span class="textonly" translate="no">[</span><a class="head" href="/">Home</a><span class="textonly" translate="no">]</span>
  160. &gt;&gt;
  161. <span class="textonly" translate="no">[</span><a class="sub" href="/projects/anonymine/">Anonymine</a><span class="textonly" translate="no">]</span>
  162. <span class="textonly" translate="no">[</span><a class="sub" href="/projects/light-sensor/">Analog light sensor</a><span class="textonly" translate="no">]</span>
  163. <span class="textonly" translate="no">[</span><a class="sub" href="/projects/PLLM-M702A/">Reverse-engineered schematics for PLLM-M702A</a><span class="textonly" translate="no">]</span>
  164. <span class="textonly" translate="no">[</span><a class="sub" href="/small-scripts/">Small scripts</a><span class="textonly" translate="no">]</span>
  165. <span class="textonly" translate="no">[</span><a class="sub" href="/thinkpad/">-&gt; My IBM thinkpad</a><span class="textonly" translate="no">]</span>
  166. </p>
  167. <p class="row">
  168. <span class="textonly" translate="no">]</span><span class="sub active">Sitemap</span><span class="textonly" translate="no">[</span>
  169. </p>
  170. <hr class="textonly"/>
  171. </div></div></nav>
  172. <!-- END autogenerated navigation -->
  173.         <main><div id="content">
  174.             <h1 id="title">Sitemap</h1>
  175. ''')
  176.     def write_sitemap(sitemap):
  177.         compressout.write_b('<ul>\n')
  178.         for url in sitemap:
  179.             compressout.write_b('<li><a href="{}">{}</a>'.format(
  180.                 url['URL'].replace('&', '&amp;').replace('"', '&quot;'),
  181.                 pretty(url['description'])
  182.             ))
  183.             if 'subsitemap' in url:
  184.                 write_sitemap(url['subsitemap'])
  185.             compressout.write_b('</li>\n')
  186.         compressout.write_b('</ul>\n')
  187.     write_sitemap(sitemapdata.sitemap)
  188.     compressout.write_b('''
  189.             <h2>Other sitemaps</h2>
  190.             <ul>
  191.     ''')
  192.     for URL, name in sitemapdata.html_sitemaps:
  193.         compressout.write_b('<li><a href="{}">{}</a></li>\n'.format(
  194.             URL.replace('&', '&amp;').replace('"', '&quot;'),
  195.             pretty(name)
  196.         ))
  197.     compressout.write_b('''
  198.             </ul>
  199.         </div></main>
  200.         
  201. <!-- INCLUDED FOOTER -->
  202.     <hr class="textonly"/>
  203.     <p>
  204.         Copyright © Oskar Skog<br/>
  205.         Website content released under the <a
  206.         href="https://creativecommons.org/licenses/by/4.0/" rel="license noopener"
  207.         target="_blank">Creative Commons Attribution (CC-BY 4.0)</a> license
  208.         and my software usually under the <span class="a"><a target="_blank"
  209.         rel="noopener"
  210.         href="https://opensource.org/licenses/BSD-2-Clause">FreeBSD license
  211.         (2-clause)</a>.</span>
  212.         <br/>
  213.         Images may be from other sites, I should have cited useful sources
  214.         somewhere on the page.
  215.         <span class="notprint">Contact me if I haven't.</span>
  216.     </p>
  217.     <p id="contact" class="notprint">
  218.         You can contact me at: <a href="mailto:oskar@oskog97.com"
  219.         rel="noopener" target="_blank">oskar@oskog97.com</a>
  220.         <span class="a">(<a href="/pgp-pub/oskar.asc"
  221.                             >PGP public key</a>)</span>
  222.     </p>
  223.     <p> <a class="notprint" href="https://oskog97.com/read/?path=/style.css">
  224.             CSS Stylesheet
  225.         </a>
  226.         <a href="https://validator.w3.org/check/referrer" rel="nofollow noopener"
  227.             target="_blank" class="notprint"><span
  228.             class="img">Valid HTML5</span
  229.         ></a><br/>
  230.     </p>
  231. </div></footer>
  232. <!-- END OF INCLUDED FOOTER -->
  233.     </body>
  234. </html>
  235.     ''')
  236. def main():
  237.     os.chdir(sitemapdata.basedir)
  238.     compressout.init()
  239.     compressout.write_h('X-Robots-Tag: noindex, follow\n')
  240.     query_string = os.getenv('QUERY_STRING')
  241.     if query_string == 'xml':
  242.         XML()
  243.     else:
  244.         HTML()
  245.     compressout.done()
  246. if __name__ == '__main__':
  247.     main()